Import commonly used modules and objects


In [1]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

Let's look at some functionality of Series


In [2]:
s = Series([4, 7, -5, 3])
s1 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
s2 = Series([2, -9, 6, 7], index=['b', 'a', 'c', 'd'])
data = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
s3 = Series(data)

In [3]:
s.values


Out[3]:
array([ 4,  7, -5,  3])

In [4]:
s.index


Out[4]:
Int64Index([0, 1, 2, 3], dtype='int64')

In [7]:
s1['a']


Out[7]:
-5

In [6]:
s[0] = 9

In [8]:
s1[['a', 'b', 'c']]


Out[8]:
a   -5
b    7
c    3
dtype: int64

In [9]:
s > 0


Out[9]:
0     True
1     True
2    False
3     True
dtype: bool

In [10]:
s[s > 0]


Out[10]:
0    9
1    7
3    3
dtype: int64

In [11]:
s * 2


Out[11]:
0    18
1    14
2   -10
3     6
dtype: int64

In [12]:
np.exp(s)


Out[12]:
0    8103.083928
1    1096.633158
2       0.006738
3      20.085537
dtype: float64

In [13]:
'a' in s1


Out[13]:
True

In [14]:
s = s.reindex([0, 1, 2, 3, 4])

In [15]:
s


Out[15]:
0     9
1     7
2    -5
3     3
4   NaN
dtype: float64

In [16]:
s.isnull()


Out[16]:
0    False
1    False
2    False
3    False
4     True
dtype: bool

In [17]:
s1 + s2


Out[17]:
a   -14
b     9
c     9
d    11
dtype: int64

In [18]:
s1 * s2


Out[18]:
a    45
b    14
c    18
d    28
dtype: int64

In [19]:
s1 / s2


Out[19]:
a    0.555556
b    3.500000
c    0.500000
d    0.571429
dtype: float64

Now lets look into some DataFrame functionality


In [20]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
index=['one', 'two', 'three', 'four', 'five']
df = DataFrame(data, index=index)

In [21]:
df


Out[21]:
pop state year
one 1.5 Ohio 2000
two 1.7 Ohio 2001
three 3.6 Ohio 2002
four 2.4 Nevada 2001
five 2.9 Nevada 2002

In [22]:
df.columns


Out[22]:
Index([u'pop', u'state', u'year'], dtype='object')

In [23]:
df['pop']


Out[23]:
one      1.5
two      1.7
three    3.6
four     2.4
five     2.9
Name: pop, dtype: float64

In [24]:
df[['state', 'pop']]


Out[24]:
state pop
one Ohio 1.5
two Ohio 1.7
three Ohio 3.6
four Nevada 2.4
five Nevada 2.9

In [25]:
df.ix['one']


Out[25]:
pop       1.5
state    Ohio
year     2000
Name: one, dtype: object

In [26]:
df['debt'] = np.arange(5.)
df


Out[26]:
pop state year debt
one 1.5 Ohio 2000 0
two 1.7 Ohio 2001 1
three 3.6 Ohio 2002 2
four 2.4 Nevada 2001 3
five 2.9 Nevada 2002 4

In [27]:
df.drop('five')


Out[27]:
pop state year debt
one 1.5 Ohio 2000 0
two 1.7 Ohio 2001 1
three 3.6 Ohio 2002 2
four 2.4 Nevada 2001 3

In [28]:
df.drop('debt', axis=1)


Out[28]:
pop state year
one 1.5 Ohio 2000
two 1.7 Ohio 2001
three 3.6 Ohio 2002
four 2.4 Nevada 2001
five 2.9 Nevada 2002

In [29]:
df['pop'] > 2


Out[29]:
one      False
two      False
three     True
four      True
five      True
Name: pop, dtype: bool

In [30]:
df[df['pop'] > 2]


Out[30]:
pop state year debt
three 3.6 Ohio 2002 2
four 2.4 Nevada 2001 3
five 2.9 Nevada 2002 4